Google Maps Email Scraper Template

工作流概述

这是一个包含26个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "name": "Google Maps Email Scraper Template",
  "tags": [],
  "nodes": [
    {
      "id": "79df5316-c210-478d-a4de-35b5d31924ee",
      "name": "Remove Duplicate URLs",
      "type": "n8n-nodes-base.removeDuplicates",
      "position": [
        -780,
        380
      ],
      "parameters": {},
      "typeVersion": 1.1
    },
    {
      "id": "985ac7e3-b501-4079-a043-780677c94b52",
      "name": "Loop over queries",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1080,
        -100
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "3a478935-781b-4fb1-bdc7-fcf8be1334bc",
      "name": "Search Google Maps with query",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1380,
        380
      ],
      "parameters": {
        "url": "=https://www.google.com/maps/search/{{ $json.query }}",
        "options": {
          "allowUnauthorizedCerts": false
        }
      },
      "executeOnce": false,
      "typeVersion": 4.2,
      "alwaysOutputData": false
    },
    {
      "id": "477e7d55-b7d6-4b20-ac44-dd1f443e270a",
      "name": "Scrape URLs from results",
      "type": "n8n-nodes-base.code",
      "position": [
        -1180,
        380
      ],
      "parameters": {
        "jsCode": "const data = $input.first().json.data

const regex = /https?:\/\/[^\/]+/g

const urls = data.match(regex)

return urls.map(url => ({json: {url: url}}))"
      },
      "typeVersion": 2
    },
    {
      "id": "a5b67e45-a3f6-41d2-aa58-c26a441c41b2",
      "name": "Filter irrelevant URLs",
      "type": "n8n-nodes-base.filter",
      "position": [
        -980,
        380
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "041797f2-2fe2-41dc-902a-d34050b9b304",
              "operator": {
                "type": "string",
                "operation": "notRegex"
              },
              "leftValue": "={{ $json.url }}",
              "rightValue": "=(google|gstatic|ggpht|schema\.org|example\.com|sentry-next\.wixpress\.com|imli\.com|sentry\.wixpress\.com|ingest\.sentry\.io)"
            },
            {
              "id": "eb499a7e-17bc-453c-be08-a47286f726dd",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "40ec6d1f-1c98-4c9f-8499-c5893c3df7b9",
      "name": "Request web page for URL",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        -380,
        460
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {}
      },
      "typeVersion": 4.2,
      "alwaysOutputData": false
    },
    {
      "id": "12f662a8-c55f-409a-b381-f37ab6dd3794",
      "name": "Loop over URLs",
      "type": "n8n-nodes-base.splitInBatches",
      "onError": "continueErrorOutput",
      "position": [
        -580,
        380
      ],
      "parameters": {
        "options": {
          "reset": false
        }
      },
      "typeVersion": 3
    },
    {
      "id": "e6957d05-3533-48ae-9cc1-ee4ac026a2a6",
      "name": "Loop over pages",
      "type": "n8n-nodes-base.splitInBatches",
      "onError": "continueErrorOutput",
      "position": [
        -360,
        120
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3,
      "alwaysOutputData": false
    },
    {
      "id": "018621c0-0ea9-4865-b110-b6d0727f0588",
      "name": "Scrape emails from page",
      "type": "n8n-nodes-base.code",
      "onError": "continueRegularOutput",
      "position": [
        -200,
        220
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const data = $json.data

const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.(?!png|jpg|gif|jpeg)[a-zA-Z]{2,}/g

const emails = data.match(emailRegex)

return {json: {emails: emails}}"
      },
      "typeVersion": 2
    },
    {
      "id": "5509b8e2-a6fc-4fbe-bbc5-1bc1d5de1c98",
      "name": "Aggregate arrays of emails",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        -40,
        100
      ],
      "parameters": {
        "options": {
          "mergeLists": true
        },
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "emails"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "f1f01f03-b62e-453f-b938-ffe4f9b3f4de",
      "name": "Split out into default data structure",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        180,
        100
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "emails"
      },
      "typeVersion": 1
    },
    {
      "id": "ec27d665-d9c1-4f10-9c52-0d5ea89cbf77",
      "name": "Remove duplicate emails",
      "type": "n8n-nodes-base.removeDuplicates",
      "position": [
        400,
        100
      ],
      "parameters": {
        "compare": "selectedFields",
        "options": {},
        "fieldsToCompare": "emails"
      },
      "typeVersion": 1.1
    },
    {
      "id": "4a071bf0-23ad-455b-b231-bafd3b32e4f8",
      "name": "Filter irrelevant emails",
      "type": "n8n-nodes-base.filter",
      "position": [
        600,
        100
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "041797f2-2fe2-41dc-902a-d34050b9b304",
              "operator": {
                "type": "string",
                "operation": "notRegex"
              },
              "leftValue": "={{ $json.emails }}",
              "rightValue": "=(google|gstatic|ggpht|schema\.org|example\.com|sentry\.wixpress\.com|sentry-next\.wixpress\.com|ingest\.sentry\.io|sentry\.io|imli\.com)"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "59675faa-2b0d-4ba5-82c7-dc5dedcad31e",
      "name": "Save emails to Google Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        800,
        100
      ],
      "parameters": {
        "columns": {
          "value": {
            "Emails": "={{ $json.emails }}"
          },
          "schema": [
            {
              "id": "Emails",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Emails",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "Emails"
          ]
        },
        "options": {},
        "operation": "append"
      },
      "typeVersion": 4.5
    },
    {
      "id": "93437e8b-4f8d-40a1-9585-cab1b556164a",
      "name": "Starts scraper workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -1600,
        380
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "eed77477-777d-450d-a975-4d2848b1cf55",
      "name": "Run workflow",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1320,
        -100
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "dffaf04e-d1d2-4002-9a69-f0904b61fc2d",
      "name": "Wait between executions",
      "type": "n8n-nodes-base.wait",
      "position": [
        -700,
        0
      ],
      "webhookId": "40eb11a9-0f7d-4932-993e-0052b69dbf9b",
      "parameters": {
        "amount": 2
      },
      "typeVersion": 1.1
    },
    {
      "id": "18787007-1d11-41b9-89c3-d5f69756eda7",
      "name": "Execute scraper for query",
      "type": "n8n-nodes-base.executeWorkflow",
      "position": [
        -880,
        0
      ],
      "parameters": {
        "mode": "each",
        "options": {
          "waitForSubWorkflow": false
        },
        "workflowId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $workflow.id }}"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "67fcde25-05e4-437c-b799-4448baea7891",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2280,
        -140
      ],
      "parameters": {
        "color": 5,
        "width": 740,
        "height": 180,
        "content": "## 🛠 Setup
1. Setup your list of queries in the \"Run workflow\" manual trigger node. Watch  this [video](https://youtu.be/HaiO-UeiKBA) on how to generate the queries with ChatGPT.
3. Choose a sheet to populate with data in the **Google Sheets node**
4. Run the workflow and start getting leads into your Google Sheets document"
      },
      "typeVersion": 1
    },
    {
      "id": "ac880457-44b4-4ff7-8440-b4107f8468bb",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -700,
        -120
      ],
      "parameters": {
        "color": 6,
        "height": 100,
        "content": "**Optional** 👇
Set wait time between each query workflow execution. Default is 2 seconds."
      },
      "typeVersion": 1
    },
    {
      "id": "d83afb3d-7b71-4b47-9b50-28837aac408c",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1600,
        260
      ],
      "parameters": {
        "width": 480,
        "height": 100,
        "content": "### Scraper 👇
This workflow will be executed in the background for each query. Click the **All executions** tab in the left sidebar to see the executions running."
      },
      "typeVersion": 1
    },
    {
      "id": "007b621a-3d41-4358-aa45-560a3c8e3414",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        820,
        300
      ],
      "parameters": {
        "color": 4,
        "height": 180,
        "content": "👆 
1. Setup your **credentials**. Here's a [video tutorial](https://youtu.be/O5RnWDM27M8) on how to do that.

2. Choose which document and sheet to save the scraped emails to. "
      },
      "typeVersion": 1
    },
    {
      "id": "fc0b837f-624c-4d25-8ed7-f787f76c785b",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1760,
        -360
      ],
      "parameters": {
        "color": 3,
        "content": " ## ⚠️ Note

A [video tutorial](https://youtu.be/HaiO-UeiKBA) for this workflow guide is available on my [Youtube channel](https://www.youtube.com/channel/UCn8xmUBunez1SsDVRfZDUGA)"
      },
      "typeVersion": 1
    },
    {
      "id": "2f8665d5-2890-4f7d-908b-9c09a66b6c93",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2280,
        -360
      ],
      "parameters": {
        "color": 7,
        "width": 480,
        "height": 140,
        "content": "## Google Maps Automatic Email Scraper

This workflow automatically scrapes emails from businesses on Google Maps based on a list of queries that you provide."
      },
      "typeVersion": 1
    },
    {
      "id": "7414b2ed-259d-47da-bbd1-d9ce0d64d43c",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1000,
        540
      ],
      "parameters": {
        "color": 6,
        "width": 160,
        "height": 100,
        "content": "**Optional** 👆
Add or change the regex for filtering irrelevant URLs."
      },
      "typeVersion": 1
    },
    {
      "id": "789c9a02-e6e7-4ea6-a7a2-acc7715b377a",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        580,
        260
      ],
      "parameters": {
        "color": 6,
        "width": 200,
        "height": 100,
        "content": "**Optional** 👆
Add or change the regex for filtering irrelevant/incorrect email addresses."
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {
    "Run workflow": [
      {
        "json": {
          "query": "hollywood+dentist"
        }
      },
      {
        "json": {
          "query": "downtown+los+angeles+dentist"
        }
      },
      {
        "json": {
          "query": "santa+monica+dentist"
        }
      },
      {
        "json": {
          "query": "westwood+dentist"
        }
      },
      {
        "json": {
          "query": "west+l.a.+dentist"
        }
      },
      {
        "json": {
          "query": "the+valley+dentist"
        }
      },
      {
        "json": {
          "query": "echo+park+dentist"
        }
      },
      {
        "json": {
          "query": "culver+city+dentist"
        }
      },
      {
        "json": {
          "query": "pasadena+dentist"
        }
      },
      {
        "json": {
          "query": "silver+lake+dentist"
        }
      },
      {
        "json": {
          "query": "mid-wilshire+dentist"
        }
      },
      {
        "json": {
          "query": "beverly+hills+dentist"
        }
      },
      {
        "json": {
          "query": "north+hills+dentist"
        }
      },
      {
        "json": {
          "query": "south+los+angeles+dentist"
        }
      }
    ]
  },
  "settings": {
    "executionOrder": "v1"
  },
  "connections": {
    "Run workflow": {
      "main": [
        [
          {
            "node": "Loop over queries",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop over URLs": {
      "main": [
        [
          {
            "node": "Loop over pages",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Request web page for URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop over pages": {
      "main": [
        [
          {
            "node": "Aggregate arrays of emails",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Scrape emails from page",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop over queries": {
      "main": [
        [],
        [
          {
            "node": "Execute scraper for query",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Remove Duplicate URLs": {
      "main": [
        [
          {
            "node": "Loop over URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter irrelevant URLs": {
      "main": [
        [
          {
            "node": "Remove Duplicate URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Remove duplicate emails": {
      "main": [
        [
          {
            "node": "Filter irrelevant emails",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape emails from page": {
      "main": [
        [
          {
            "node": "Loop over pages",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Starts scraper workflow": {
      "main": [
        [
          {
            "node": "Search Google Maps with query",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait between executions": {
      "main": [
        [
          {
            "node": "Loop over queries",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter irrelevant emails": {
      "main": [
        [
          {
            "node": "Save emails to Google Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Request web page for URL": {
      "main": [
        [
          {
            "node": "Loop over URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape URLs from results": {
      "main": [
        [
          {
            "node": "Filter irrelevant URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Execute scraper for query": {
      "main": [
        [
          {
            "node": "Wait between executions",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate arrays of emails": {
      "main": [
        [
          {
            "node": "Split out into default data structure",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Search Google Maps with query": {
      "main": [
        [
          {
            "node": "Scrape URLs from results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split out into default data structure": {
      "main": [
        [
          {
            "node": "Remove duplicate emails",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • Removeduplicates
  • Splitinbatches
  • Httprequest
  • Code
  • Filter

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作